{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "# tell jupyter to display images in the notebook\n",
    "%matplotlib inline\n",
    "\n",
    "# for pretty printing\n",
    "import pprint\n",
    "\n",
    "# To suppress FutureWarnings\n",
    "import warnings\n",
    "warnings.simplefilter(action='ignore', category=FutureWarning)\n",
    "\n",
    "# To increase the number of rows pandas will print before truncation\n",
    "pd.set_option('display.max_rows', 500)\n",
    "\n",
    "# To increase the number of columns pandas will print before truncation\n",
    "pd.set_option('display.max_columns', 100)\n",
    "\n",
    "# To increase the width of each column pandas will print before truncating.\n",
    "pd.set_option('display.max_colwidth',100)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Part 1: College Football Dataset and total_score\n",
    "\n",
    "Fall is college football season, so we'll first look at some data from the 2017 college football season.  \n",
    "This data was gathered from 'http://sports.snoozle.net/'.\n",
    "\n",
    "We'd like to find the total score for each game and the min, max and mean of the total scores.\n",
    "\n",
    "In this dataset, each row contains information about a single game, visiting team info followed by home team.  \n",
    "We'll need to combine two columns to find the total number of points.\n",
    "\n",
    "This csv contains columns with duplicate names.\n",
    "When there are columns with identical names, pandas attaches a suffix to discriminate them.  \n",
    "All 'Home Team' columns will have a '.1' suffix."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load the data from ../data/cfb_2017.csv into the variable df_cfb\n",
    "df_cfb = pd.read_csv('../data/cfb_2017.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Date</th>\n",
       "      <th>Vis Team</th>\n",
       "      <th>Rushing Yards</th>\n",
       "      <th>Rushing Attempts</th>\n",
       "      <th>Passing Yards</th>\n",
       "      <th>Passing Attempts</th>\n",
       "      <th>Passing Completions</th>\n",
       "      <th>Penalties</th>\n",
       "      <th>Penalty Yards</th>\n",
       "      <th>Fumbles Lost</th>\n",
       "      <th>Interceptions Thrown</th>\n",
       "      <th>1st Downs</th>\n",
       "      <th>3rd Down Attempts</th>\n",
       "      <th>3rd Down Conversions</th>\n",
       "      <th>4th Down Attempts</th>\n",
       "      <th>4th Down conversions</th>\n",
       "      <th>Time of Possession</th>\n",
       "      <th>Score</th>\n",
       "      <th>Home Team</th>\n",
       "      <th>Rushing Yards.1</th>\n",
       "      <th>Rushing Attempts.1</th>\n",
       "      <th>Passing Yards.1</th>\n",
       "      <th>Passing Attempts.1</th>\n",
       "      <th>Passing Completions.1</th>\n",
       "      <th>Penalties.1</th>\n",
       "      <th>Penalty Yards.1</th>\n",
       "      <th>Fumbles Lost.1</th>\n",
       "      <th>Interceptions Thrown.1</th>\n",
       "      <th>1st Downs.1</th>\n",
       "      <th>3rd Down Attempts.1</th>\n",
       "      <th>3rd Down Conversions.1</th>\n",
       "      <th>4th Down Attempts.1</th>\n",
       "      <th>4th Down conversions.1</th>\n",
       "      <th>Time of Possession.1</th>\n",
       "      <th>Score.1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2017-08-26</td>\n",
       "      <td>Portland State</td>\n",
       "      <td>86</td>\n",
       "      <td>36</td>\n",
       "      <td>134</td>\n",
       "      <td>36</td>\n",
       "      <td>16</td>\n",
       "      <td>7</td>\n",
       "      <td>70</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>14</td>\n",
       "      <td>19</td>\n",
       "      <td>7</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>1821</td>\n",
       "      <td>6</td>\n",
       "      <td>BYU</td>\n",
       "      <td>171</td>\n",
       "      <td>35</td>\n",
       "      <td>194</td>\n",
       "      <td>27</td>\n",
       "      <td>16</td>\n",
       "      <td>7</td>\n",
       "      <td>70</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>13</td>\n",
       "      <td>13</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1779</td>\n",
       "      <td>20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2017-08-26</td>\n",
       "      <td>USF</td>\n",
       "      <td>315</td>\n",
       "      <td>74</td>\n",
       "      <td>233</td>\n",
       "      <td>26</td>\n",
       "      <td>13</td>\n",
       "      <td>10</td>\n",
       "      <td>57</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>29</td>\n",
       "      <td>19</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>2190</td>\n",
       "      <td>42</td>\n",
       "      <td>San Jose State</td>\n",
       "      <td>109</td>\n",
       "      <td>38</td>\n",
       "      <td>278</td>\n",
       "      <td>50</td>\n",
       "      <td>25</td>\n",
       "      <td>9</td>\n",
       "      <td>64</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>18</td>\n",
       "      <td>21</td>\n",
       "      <td>7</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1410</td>\n",
       "      <td>22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2017-08-26</td>\n",
       "      <td>Oregon State</td>\n",
       "      <td>152</td>\n",
       "      <td>29</td>\n",
       "      <td>304</td>\n",
       "      <td>47</td>\n",
       "      <td>27</td>\n",
       "      <td>3</td>\n",
       "      <td>21</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>23</td>\n",
       "      <td>16</td>\n",
       "      <td>10</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1541</td>\n",
       "      <td>27</td>\n",
       "      <td>Colorado State</td>\n",
       "      <td>191</td>\n",
       "      <td>43</td>\n",
       "      <td>334</td>\n",
       "      <td>39</td>\n",
       "      <td>26</td>\n",
       "      <td>8</td>\n",
       "      <td>40</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>31</td>\n",
       "      <td>14</td>\n",
       "      <td>8</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2059</td>\n",
       "      <td>58</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         Date        Vis Team  Rushing Yards  Rushing Attempts  Passing Yards  \\\n",
       "0  2017-08-26  Portland State             86                36            134   \n",
       "1  2017-08-26             USF            315                74            233   \n",
       "2  2017-08-26    Oregon State            152                29            304   \n",
       "\n",
       "   Passing Attempts  Passing Completions  Penalties  Penalty Yards  \\\n",
       "0                36                   16          7             70   \n",
       "1                26                   13         10             57   \n",
       "2                47                   27          3             21   \n",
       "\n",
       "   Fumbles Lost  Interceptions Thrown  1st Downs  3rd Down Attempts  \\\n",
       "0             0                     1         14                 19   \n",
       "1             0                     0         29                 19   \n",
       "2             2                     3         23                 16   \n",
       "\n",
       "   3rd Down Conversions  4th Down Attempts  4th Down conversions  \\\n",
       "0                     7                  4                     2   \n",
       "1                     3                  3                     3   \n",
       "2                    10                  0                     0   \n",
       "\n",
       "   Time of Possession  Score       Home Team  Rushing Yards.1  \\\n",
       "0                1821      6             BYU              171   \n",
       "1                2190     42  San Jose State              109   \n",
       "2                1541     27  Colorado State              191   \n",
       "\n",
       "   Rushing Attempts.1  Passing Yards.1  Passing Attempts.1  \\\n",
       "0                  35              194                  27   \n",
       "1                  38              278                  50   \n",
       "2                  43              334                  39   \n",
       "\n",
       "   Passing Completions.1  Penalties.1  Penalty Yards.1  Fumbles Lost.1  \\\n",
       "0                     16            7               70               0   \n",
       "1                     25            9               64               0   \n",
       "2                     26            8               40               1   \n",
       "\n",
       "   Interceptions Thrown.1  1st Downs.1  3rd Down Attempts.1  \\\n",
       "0                       0           13                   13   \n",
       "1                       3           18                   21   \n",
       "2                       1           31                   14   \n",
       "\n",
       "   3rd Down Conversions.1  4th Down Attempts.1  4th Down conversions.1  \\\n",
       "0                       4                    1                       0   \n",
       "1                       7                    2                       1   \n",
       "2                       8                    1                       1   \n",
       "\n",
       "   Time of Possession.1  Score.1  \n",
       "0                  1779       20  \n",
       "1                  1410       22  \n",
       "2                  2059       58  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Print the first 3 rows of the dataset using head\n",
    "df_cfb.head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 999 entries, 0 to 998\n",
      "Data columns (total 35 columns):\n",
      "Date                      999 non-null object\n",
      "Vis Team                  999 non-null object\n",
      "Rushing Yards             999 non-null int64\n",
      "Rushing Attempts          999 non-null int64\n",
      "Passing Yards             999 non-null int64\n",
      "Passing Attempts          999 non-null int64\n",
      "Passing Completions       999 non-null int64\n",
      "Penalties                 999 non-null int64\n",
      "Penalty Yards             999 non-null int64\n",
      "Fumbles Lost              999 non-null int64\n",
      "Interceptions Thrown      999 non-null int64\n",
      "1st Downs                 999 non-null int64\n",
      "3rd Down Attempts         999 non-null int64\n",
      "3rd Down Conversions      999 non-null int64\n",
      "4th Down Attempts         999 non-null int64\n",
      "4th Down conversions      999 non-null int64\n",
      "Time of Possession        999 non-null int64\n",
      "Score                     999 non-null int64\n",
      "Home Team                 999 non-null object\n",
      "Rushing Yards.1           999 non-null int64\n",
      "Rushing Attempts.1        999 non-null int64\n",
      "Passing Yards.1           999 non-null int64\n",
      "Passing Attempts.1        999 non-null int64\n",
      "Passing Completions.1     999 non-null int64\n",
      "Penalties.1               999 non-null int64\n",
      "Penalty Yards.1           999 non-null int64\n",
      "Fumbles Lost.1            999 non-null int64\n",
      "Interceptions Thrown.1    999 non-null int64\n",
      "1st Downs.1               999 non-null int64\n",
      "3rd Down Attempts.1       999 non-null int64\n",
      "3rd Down Conversions.1    999 non-null int64\n",
      "4th Down Attempts.1       999 non-null int64\n",
      "4th Down conversions.1    999 non-null int64\n",
      "Time of Possession.1      999 non-null int64\n",
      "Score.1                   999 non-null int64\n",
      "dtypes: int64(32), object(3)\n",
      "memory usage: 273.2+ KB\n"
     ]
    }
   ],
   "source": [
    "# print out information about the dataframe using .info()\n",
    "df_cfb.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "dataframe has 999 records\n"
     ]
    }
   ],
   "source": [
    "# How may records are in the dataset?\n",
    "print('dataframe has {} records'.format(len(df_cfb)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "there are 0 missing scores\n"
     ]
    }
   ],
   "source": [
    "# How many values are missing from the Score and Score.1 columns?\n",
    "print('there are {} missing scores'.format(sum(df_cfb.Score.isnull())+sum(df_cfb.loc[:,'Score.1'].isnull())))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "the mean visiting team score is 24.87\n"
     ]
    }
   ],
   "source": [
    "# What is the average (mean) score for visiting teams?\n",
    "print('the mean visiting team score is {:0.2f}'.format(df_cfb.Score.mean()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# To examine the total score, we need to combine Score and Score.1\n",
    "# Create a new column called 'total_score' which is the sum of the 'Score' and 'Score.1' columns\n",
    "df_cfb['total_score'] = df_cfb.Score + df_cfb['Score.1']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "the mean total score is 55.98\n"
     ]
    }
   ],
   "source": [
    "# What is the average (mean) total score?\n",
    "print('the mean total score is {:0.2f}'.format(df_cfb.total_score.mean()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    999.000000\n",
       "mean      55.977978\n",
       "std       18.671603\n",
       "min        6.000000\n",
       "25%       43.000000\n",
       "50%       55.000000\n",
       "75%       67.000000\n",
       "max      139.000000\n",
       "Name: total_score, dtype: float64"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Use .describe to show other values associated with total score\n",
    "df_cfb.total_score.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "the minimum and maximum values are 6 and 139\n"
     ]
    }
   ],
   "source": [
    "# what are the min and max values for total_score?\n",
    "print('the minimum and maximum values are {} and {}'.format(6,139))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Vis Team</th>\n",
       "      <th>Score</th>\n",
       "      <th>Home Team</th>\n",
       "      <th>Score.1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>354</th>\n",
       "      <td>W Michigan</td>\n",
       "      <td>71</td>\n",
       "      <td>Buffalo</td>\n",
       "      <td>68</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       Vis Team  Score Home Team  Score.1\n",
       "354  W Michigan     71   Buffalo       68"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# What were the team names and team scores for the game with the highest total score?\n",
    "# We should see a single row with the columns: Vis Team, Score, Home Team, Score.1\n",
    "df_cfb[df_cfb.total_score == df_cfb.total_score.max()][['Vis Team','Score','Home Team','Score.1']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYcAAAELCAYAAAAybErdAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzt3Xl8XNV99/HPT7tk2dptyZJteQW8AF6w2UMWwBCCWcMWIC0pfdImfRqa9IHmlaSlSRva50kasjWkpBASFsdAMMRgwOxgjOV9t+VVmy3ZkrXvc54/5sqVtY5tSXek+b5fL73m3jvn3vnNlWZ+uuece4455xAREeksyu8AREQk/Cg5iIhIN0oOIiLSjZKDiIh0o+QgIiLdKDmIiEg3Sg4iItKNkoOIiHSj5CAiIt3E+B3AqcjMzHT5+fl+hyEiMqysW7fuqHMu61T2GVbJIT8/n4KCAr/DEBEZVszs4Knuo2olERHpRslBRES6UXIQEZFulBxERKQbJQcREelGyUFERLpRchARkW6UHEREpBslBxER6WZY3SEt0uHpNYf6LXPnoolDEInIyKQrBxER6Sak5GBmi81sl5kVmtmDPTwfb2bPec+vMbN8b/uVZrbOzLZ4j5/ptM873jE3ej9jB+pNiYjImem3WsnMooGfA1cCxcBaM1vunNveqdh9QJVzbpqZ3Q48AtwGHAW+4JwrNbPZwEogt9N+dznnNJKeiEiYCeXKYSFQ6Jzb55xrAZ4FlnQpswR40lteBnzWzMw5t8E5V+pt3wYkmFn8QAQuIiKDJ5TkkAsUdVov5uT//k8q45xrA6qBjC5lbgY2OOeaO237b69K6TtmZqcUuYiIDJpQkkNPX9ruVMqY2SyCVU1/2en5u5xzc4DLvJ+7e3xxs/vNrMDMCioqKkIIV0REzlQoyaEYmNBpPQ8o7a2MmcUAKUClt54HvAjc45zb27GDc67Ee6wFniZYfdWNc+4x59wC59yCrKxTmshIREROUyjJYS0w3cwmm1kccDuwvEuZ5cC93vItwFvOOWdmqcCfgIeccx92FDazGDPL9JZjgeuArWf2VkREZKD0mxy8NoSvEexptANY6pzbZmYPm9n1XrHHgQwzKwQeADq6u34NmAZ8p0uX1XhgpZltBjYCJcCvB/KNiYjI6QvpDmnn3ApgRZdt3+203ATc2sN+3we+38th54cepoiIDCXdIS0iIt0oOYiISDdKDiIi0o2Sg4iIdKPkICIi3Sg5iIhIN0oOIiLSjZKDiIh0o+QgIiLdKDmIiEg3Sg4iItKNkoOIiHSj5CAiIt0oOYiISDdKDiIi0o2Sg4iIdKPkICIi3Sg5iIhIN0oOIiLSjZKDiIh0o+QgIiLdxPgdgESWp9cc6rfMnYsmDkEkItIXXTmIiEg3unKQYaWmqZX3dlfw/PpiymuayEyOJzslgXPzUklJjPU7PJERQ8lBwk5PVU/OOdYdrOLVrYdpbG0nITaK7DEJ7K2oY0PRcd7eVc71543nvLxUzMyHqEVGFiUHCXs1ja08V1DE/qP1TMpI4uqZ2UxITyI6KpgEjtY2s2x9MUsLitl9pI5b5ucRpQQhckaUHCSsHa5p4smPDtDY0s6Nc3OZPymt2xd/5uh47r98Cm/tLOetneWMTojhmtk5PkUsMjIoOUjY2ne0jqdWHyQuJor7L5/C+NTEXstGmfG5c8ZR39zG+3uOMnZ0vHo9iZwB9VaSsHTwWD1PfnSAMYmxfPVTU/tMDJ1dd+54pmUl88cNpWwuPj7IUYqMXEoOEnaKqxp44qMDpCTG8pVLJ5OaFBfyvtFRxh0LJ5IUH823X9xKe8ANYqQiI5eSg4SVY3XNPPHRAZLiornv0imMTjj17qmJcdF8fk4OW0qqeXrNwUGIUmTkCyk5mNliM9tlZoVm9mAPz8eb2XPe82vMLN/bfqWZrTOzLd7jZzrtM9/bXmhmj5r6H0a8xpZ2nlwd/DL/80smn9F9C3NyU7h0Wib/tnIXFbXNAxWiSMToNzmYWTTwc+AaYCZwh5nN7FLsPqDKOTcN+DHwiLf9KPAF59wc4F7gqU77/BK4H5ju/Sw+g/chw1x7wPHMJ4eoqm/hrkWTyEiOP6PjmRkPL5lFc2uAR17bOUBRikSOUK4cFgKFzrl9zrkW4FlgSZcyS4AnveVlwGfNzJxzG5xzpd72bUCCd5WRA4xxzq12zjngt8ANZ/xuZNh6ffthCivquGFuLpMzRw3IMadkJXPPRZN4YX0x+yrqBuSYIpEilOSQCxR1Wi/2tvVYxjnXBlQDGV3K3AxscM41e+WL+zmmRIhdh2t5f89RFk5OZ/6ktAE99l9+aipxMVH89K3CAT2uyEgXSnLoqS2gaxeQPsuY2SyCVU1/eQrH7Nj3fjMrMLOCioqKEMKV4aSmsZU/rCsie0wCn58z8DeuZY2O556L8nlpYwl7dfUgErJQkkMxMKHTeh5Q2lsZM4sBUoBKbz0PeBG4xzm3t1P5vH6OCYBz7jHn3ALn3IKsrKwQwpXhwjnHsvXFtLYHuP2CCcRGD07nufsvn0J8TDSPrtozKMcXGYlC+TSuBaab2WQziwNuB5Z3KbOcYIMzwC3AW845Z2apwJ+Ah5xzH3YUds6VAbVmdqHXS+ke4KUzfC8yzGwoOk5heR3XzM5h7JiEQXudzOR47r04n+WbStl/tH7QXkdkJOl3+AznXJuZfQ1YCUQDv3HObTOzh4EC59xy4HHgKTMrJHjFcLu3+9eAacB3zOw73rarnHPlwFeBJ4BE4FXvRyJEXXMbK7aUMTE9iYWT0wf99f780nx+88F+Hv9gH9+/Yc6J7Zp8SKRnIY2t5JxbAazosu27nZabgFt72O/7wPd7OWYBMPtUgpWR49UtZTS3Brhxbu6QjKA6dnQCN87NZdm6Yh648izSR4V+17VIJNId0jLk9h+tZ0PRcS6bkcm4QaxO6uorl02mqTXA7z7WXdMi/VFykCHlnOO1rWWMSYjhihljh/S1p48bzafPyuK3qw/Q1No+pK8tMtwoOciQ2lZaQ1FVI587ZxxxMUP/5/cXl03haF0Lf9xQMuSvLTKcKDnIkGltD/D69sOMHR3P3IkDe7NbqC6amsHZ2aP57eqDBG/OF5GeKDnIkFlaUMTRuhaunpV9YorPoWZm3H3RJLaX1bD+kOZ7EOmNkoMMidb2AL94ey8T0hI5O3u0r7HccH4uyfExapgW6YOSgwyJVzaXUnK8kSvOGovfo7OPio/h5nm5/GlzGXXNbb7GIhKulBxk0AUCjl++s5cZ45I5y+erhg53XzSJlvYA6w5U+h2KSFhScpBB9/aucnYfqeOrV0wdkhveQjFt7GgumpLBmgOVBNQwLdKNkoMMul++s5fc1ESuO3e836Gc5O6LJnG8oZXdh2v9DkUk7Cg5yKDaXHycgoNV3Hfp5EEbdfV0XTlzHKMTYvh4/zG/QxEJO+H1aZUR56nVB0mKi+aWBXn9Fx5isdFRXJCfzu4jdRyr0zzTIp0pOcigqapvYfmmUm6Ym8uYhFi/w+nRwvx0ogw+2a+GaZHOlBxk0PxhXRHNbQHuuWiS36H0akxiLDNzxlBwsIrW9oDf4YiEDSUHGRSBgON3Hx9iYX46Z2eP8TucPi2akkFjaztbiqv9DkUkbCg5yKB4d08Fhyob+FIYXzV0mJI5iszkOD7RPQ8iJyg5yKBYuraI9FFxLJ6V7Xco/TIzLshP51BlA4drmvwORyQsKDnIgDtW18ybO45w49xcX4blPh3zJqYRHWWsVcO0CKDkIIPgxQ0ltLY7vrhggt+hhGxUfAyzxo9hQ1EVLW1qmBZRcpAB5ZxjaUER501IDZtxlEK1cHI6Ta0BtpaoYVpEyUEG1ObianYfqeOLYXjTW38mZ4wiMzleDdMiKDnIAFtaUERCbBRfOC+8xlEKhZmxMD8t2DBdrYZpiWxKDjJgmlrbeXlTKYtnZYftHdH9mes1TOvqQSKdkoMMmLd3llPT1MZN84ZflVKHUfExzB4/ho1qmJYIp+QgA+aFDSWMHR3PJdMy/Q7ljCycnEFTa4AtapiWCKbkIAOisr6Fd3aVs+T88URHhceEPqcrPyOJrOR4PtFQ3hLBlBxkQLyyuZTWdseNc4dvlVIHM+OCyekUVTVSVt3odzgivlBykAHxwvoSzs4ezczx4T3IXqjmTUglJspYq4ZpiVBKDnLG9h+tZ2PRcW6cm+t3KAMmKT6G2bkpbDh0nIaWNr/DERlySg5yxpZvLMUMrj9/+N3b0JcL8tNpbgvwyuYyv0MRGXJKDnJGnHMs31TCBfnp5KQk+h3OgMrPSCJrdDxPrznkdygiQy6k5GBmi81sl5kVmtmDPTwfb2bPec+vMbN8b3uGmb1tZnVm9rMu+7zjHXOj9zN2IN6QDK3tZTXsrajn+mF4R3R/gndMp7Ox6DjbS2v8DkdkSPWbHMwsGvg5cA0wE7jDzGZ2KXYfUOWcmwb8GHjE294EfAf4Zi+Hv8s5d773U346b0D8tXxTKTFRxrVzcvwOZVDMnZhKXEwUz3yiqweJLKFcOSwECp1z+5xzLcCzwJIuZZYAT3rLy4DPmpk55+qdcx8QTBIywgQCjlc2lXHp9EzSR8X5Hc6gSIqL4fNzcvjjhhI1TEtEiQmhTC5Q1Gm9GFjUWxnnXJuZVQMZwNF+jv3fZtYOPA983znnQopawsL6Q1WUHG/k766a4XcoPRqotoI7F03kxQ0lvLKpjC9eMHzmqBA5E6FcOfR0u2vXL/FQynR1l3NuDnCZ93N3jy9udr+ZFZhZQUVFRb/BytBZvqmU+JgorhoGU4GeiQWT0pg2Npnfq2pJIkgoyaEY6PzvUh5Q2lsZM4sBUoA+7x5yzpV4j7XA0wSrr3oq95hzboFzbkFWVlYI4cpQaGsPsGJLGZ89ZyzJ8aFcgA5fZsadCyeyqeg420o13pJEhlCSw1pguplNNrM44HZgeZcyy4F7veVbgLf6qiIysxgzy/SWY4HrgK2nGrz456O9xzha18L1542cG9/6ctO8XDVMS0TpNzk459qArwErgR3AUufcNjN72Myu94o9DmSYWSHwAHCiu6uZHQB+BHzZzIq9nk7xwEoz2wxsBEqAXw/c25LBtnxTKaPjY7jirMi4mktNiuO6c3N4cX0JtU2tfocjMuhCqg9wzq0AVnTZ9t1Oy03Arb3sm9/LYeeHFqKEm6bWdlZuPczVs7NJiI32O5whc+9F+bywvoRl64r5s0sm+x2OyKAa2ZXFMije2VVBbXNbtxvfRvqdxOdNSGXexFSe/OgA916UT9QwH5pcpC8aPkNO2fJNJWQmx3Hx1Ay/QxlyX75kMgeONfDubvWck5FNyUFOSW1TK6t2lHPtnBxioiPvz+ea2dmMGxPPf390wO9QRAZV5H265Yy8sf0IzW2BETmWUihio6P40qJJvLe7gsLyOr/DERk0anOQkHS0Jzzx0X5SE2PZebiW3Uci88vxjkUT+elbhfx29QEeXjLb73BEBoWuHCRk9c1tFJbXcW5eClEWuY2xmcnxfOG88SxbV0yNurXKCKUrBwnZ1tJqAg7OzUv1O5Qh1VMvrOyUBBpa2nno+S1cMi0zpOPcuWjiQIcmMmh05SAh21RUTdboeHJSEvwOxXe5qYlMykhi9b5jBDRepIxASg4SkurGVg4eq+e8vBQsgquUOrt4aiaV9S3sPlzrdygiA07VShLSzWtbSqpxRF6VUl9m5owhJTGWj/Ye4+ycMX6HIzKgdOUgIdlSfJzxKQlkJsf7HUrYiI4yFk1Op7CijiM1ms9KRhYlB+lXVUMLRVWNzMlN8TuUsHNBfjoxUcbqvcf8DkVkQCk5SL+2lgTnMJit5NDNqPgYzp+QyoaiKk0jKiOKkoP0a0tJNbmpiWSoSqlHF0/NpLXdUXCgyu9QRAaMkoP0qaq+hWJVKfUpOyWBKZmjWL3vGO0BdWuVkUHJQfq0RVVKIbl4aibVja3sKKvxOxSRAaHkIH3aUlJNXloi6aPi/A4lrJ2dM5q0pFg+2nvU71BEBoSSg/Sqsr6FkuONzB6vq4b+RJlx0ZQMDhxroPR4o9/hiJwxJQfpVUcvJbU3hGb+pHTioqN09SAjgpKD9KqjSilNVUohSYyLZu7EVDYVV1Or0VplmFNykB4dq2um5Lh6KZ2qi6dm0h5wrFW3VhnmlBykR7rx7fRkjY5nWlYyaw9UarRWGdaUHKRHW0qqmZCWSFqSqpRO1cLJ6VQ3trJLo7XKMKbkIN0crWumtLqJORqB9bSckzOGMQkxrNmv8ZZk+FJykG5OVCmN1zDUpyM6yliQn86eI3VU1rf4HY7IaVFykG62lFQzMT2JVFUpnbYL8tMxg0/2V/odishpUXKQk1TUNlNW3aReSmcoJTGWs7PHsO5gpcZbkmFJyUFOorGUBs6C/DTqW9rZdVjjLcnwo+QgJ9laUs2k9CRSEmP9DmXYmz52NKMTYlh3UPc8yPCj5CAnlNc2cbimiTl5umoYCNFRxtwJaew6Uqs7pmXYUXKQE7aWVGPALA20N2DmT0oj4GDDoeN+hyJySpQc5IRtpTVMUJXSgMoaHc+k9CTWHazC6Y5pGUZCSg5mttjMdplZoZk92MPz8Wb2nPf8GjPL97ZnmNnbZlZnZj/rss98M9vi7fOomdlAvCE5PZX1LZRVNzFL9zYMuPmT0qioa2a9rh5kGOk3OZhZNPBz4BpgJnCHmc3sUuw+oMo5Nw34MfCIt70J+A7wzR4O/UvgfmC697P4dN6ADIztpcFeSqpSGnhzclOIjTaWrSvyOxSRkIVy5bAQKHTO7XPOtQDPAku6lFkCPOktLwM+a2bmnKt3zn1AMEmcYGY5wBjn3GoXvNb+LXDDmbwROTPbSmvISUnQjG+DID42mjm5qby8qYyGlja/wxEJSSjJIRfo/C9PsbetxzLOuTagGsjo55jF/RwTADO738wKzKygoqIihHDlVNU2tXKosoGZqlIaNPMnpVHX3MarWw77HYpISEJJDj21BXRtWQulzGmVd8495pxb4JxbkJWV1cch5XRtL6vBoSqlwZSfkUR+RhJLC1S1JMNDKMmhGJjQaT0PKO2tjJnFAClAX4PKFHvH6euYMkS2l9aQMSqOcaPj/Q5lxDIzbl0wgTX7Kzl4rN7vcET6FUpyWAtMN7PJZhYH3A4s71JmOXCvt3wL8Jbro9+ec64MqDWzC71eSvcAL51y9HLGqhtb2VtRx6zxY1CHscF187w8ogz+UFDcf2ERn/WbHLw2hK8BK4EdwFLn3DYze9jMrveKPQ5kmFkh8ABworurmR0AfgR82cyKO/V0+irwX0AhsBd4dWDekpyKt3YeIeBgpqqUBl12SgKXTc/ixQ0lBDQYn4S5mFAKOedWACu6bPtup+Um4NZe9s3vZXsBMDvUQGVwrNx6hDEJMeSlJfodSkS4eX4ef/PMBj7ed4yLp2X6HY5Ir3SHdARrbGnnnd3lzBw/hihVKQ2Jq2aOY3R8DM+vL/E7FJE+KTlEsHd3V9DUGmBmjqqUhkpCbDSfPzeHV7eWUd+sex4kfCk5RLDXtx0mNSmWyZmj/A4lotw8P4+GlnZe26p7HiR8KTlEqNb2AG/uOMJnzx5HdJSqlIbSgklpTMpI4vn16rUk4UvJIUJ9vO8YNU1tLJ6d7XcoEcfMuGluHqv3HaPkeKPf4Yj0SMkhQr229TBJcdFcNl09Zvxw07xcnIM/blDDtIQnJYcIFAg4Xt9+hCvOyiIhNtrvcCLShPQkFk5O5/l1xZrnQcKSkkME2lBURUVtM1fPUpWSn26Zl8e+o/VsKNI8DxJ+lBwi0GtbDxMbbXz67LF+hxLRrpmTTUJsFC+oYVrCkJJDhHHOsXLbES6ZlsmYBE0H6qfRCbFcPSublzeV0dzW7nc4IicJafgMGd6eXnPoxHJZdSOHKhuYPyntpO3ij5vn5fHSxlLe3F7O58/N8TsckRN05RBhtpXWYMA5OZrYJxxcMi2TnJQEzfMgYUfJIcJsL61hUsYokuN10RgOoqOMW+bn8d6eCkp1z4OEESWHCHKsrpnDNU3M0nSgYeWLCybgHCxbp4ZpCR9KDhFkW2kNgOaKDjMT0pO4ZFoGSwuKNM+DhA0lhwiyrbSa3NRE0pLi/A5FuvjiggkUVzXy0d5jfociAig5RIzqxlaKqhp11RCmrp6VTUpiLM+uVQ8yCQ9KDhFiR1mwSmmWeimFpYTYaG6cm8vKbYc5WtfsdzgiSg6RYltpNVnJ8Ywdk+B3KNKLuy+aRGu747m16tYq/lNyiAANLW3sP1qvKqUwNzUrmUumZfD7jw/S1h7wOxyJcEoOEWB7aQ0BB7PHazrQcHf3hfmUVjexame536FIhFNyiABbS6tJS4plfKqqlMLd584Zy/iUBH738UG/Q5EIp9tkR7jjDS0Ultdx6bRMzDQdqJ9CGcvqzkUTuXPRRP7v67spLK9j2tjkIYhMpDtdOYxwr28/EqxSylWV0nBxx8KJxMdE8fgH+/wORSKYksMIt2JLGWlJseSmJvodioQoIzmeWxfk8fy6Esprm/wORyKUksMIVt3QyoeFR5mdm6IqpWHmK5dOoS0Q4IkPD/gdikQoJYcR7PXth2ltd8xRldKwk585imtm5/DUxwepa27zOxyJQEoOI9irWw+Tm5qoKqVh6i8/NYXapjaeXqOeSzL0lBxGqOrGVt7fU8G1c7JVpTRMnZuXyqXTMvnVu/uo19WDDDF1ZR2h3tx+hNZ2x7VzcthRVut3OBKirt1d5+Sm8EHhUf72uY18+qyxQLC7q8hg05XDCLViSxm5qYmcPyHV71DkDExIT+Kc7NG8v6eCxpZ2v8ORCBJScjCzxWa2y8wKzezBHp6PN7PnvOfXmFl+p+ce8rbvMrOrO20/YGZbzGyjmRUMxJuRoJqmVt7fc5RrZqtKaST43MxxNLcGeG9Phd+hSATpNzmYWTTwc+AaYCZwh5nN7FLsPqDKOTcN+DHwiLfvTOB2YBawGPiFd7wOn3bOne+cW3DG70ROeHP7EVraA1x7bo7focgAyElJZE5eCh/tPUp1Y6vf4UiECOXKYSFQ6Jzb55xrAZ4FlnQpswR40lteBnzWgv+yLgGedc41O+f2A4Xe8WQQrdhSxviUBOaqSmnEuHpmNs4Ff7ciQyGU5JALdB5gvtjb1mMZ51wbUA1k9LOvA143s3Vmdv+phy49Od7Qwnu7j3LtnBxVKY0gaaPi+NRZWWwpqebDwqN+hyMRIJTk0NM3TNdZ0Hsr09e+lzjn5hGsrvprM7u8xxc3u9/MCsysoKJCda79eWVzGS3tAW6Y2zV/y3B3+fQs0kfF8b3l22hp03wPMrhCSQ7FwIRO63lAaW9lzCwGSAEq+9rXOdfxWA68SC/VTc65x5xzC5xzC7KyskIIN7K9uKGEGeOSmaWJfUac2OgoPj8nh8LyOn79vgblk8EVSnJYC0w3s8lmFkewgXl5lzLLgXu95VuAt5xzztt+u9ebaTIwHfjEzEaZ2WgAMxsFXAVsPfO3E9kOHK1n3cEqbpybpyqlEeqcnDF8fk4O//Hm7hPzgosMhn6Tg9eG8DVgJbADWOqc22ZmD5vZ9V6xx4EMMysEHgAe9PbdBiwFtgOvAX/tnGsHxgEfmNkm4BPgT8651wb2rUWeFzeUYAY3zB3vdygyiP75htmkJMbxwNJNql6SQRPSHdLOuRXAii7bvttpuQm4tZd9fwD8oMu2fcB5pxqs9M45x4sbSrh4agY5KRpLaSRLHxXHv940h7/4bQE/WbWbb119tt8hyQikO6RHiIKDVRyqbODGuXl+hyJD4MqZ4/jigjx+8c5e3tfNcTIIlBxGiGc+OcSouGiumZ3tdygyRP7x+llMH5vM3z67kcPVmhRIBpaSwwhQ3dDKnzaXsWRuLqPiNZZipEiKi+EXd82jsbWdrz+zntZ2tT/IwFFyGAGeX19Mc1uAOxdqtM5IM23saP71pjmsPVDF95ZvI9hJUOTM6d/MYc45xzOfHOK8vBRma8a3iNB1WG+AT83I4uk1h6huaOWSaZka1lvOmJLDMFdwsIo95XU8cvMcv0MRH105cxwVtc2s2FJGRnKc3+HICKBqpWHu9x8fJDk+hi+cp3sbIlmUGV9cMIGc1ASeXVvEzsO6QU7OjJLDMFZW3cgrm8u4ZX4eSXG6CIx0cTFR3H1hPgkxUdz3RAEVtc1+hyTDmJLDMPbEhwcIOMd9l072OxQJEymJsdx9YT7H6pv5i98W0NCiuafl9Cg5DFO1Ta08veYQ187JYUJ6kt/hSBjJTUvkJ7fPZXPxcb76u/UaYkNOi5LDMPXc2iJqm9u4//IpfociYejqWdn8y41zeHd3BX/3h00EAuriKqdGFdXDUGt7gN98sJ8Lp6Rzbp5me5PuOrq7Lp6VzcubSimvaeL688afNFqvurtKX5QchqGlBUWUVjfxg5vUfVX6dvmMLOpb2nh/z1GS4mK4cuY4v0OSYULJYZhpam3n0VV7WDApjSu8G59E+rJ4VjaNLe28vaucxLhoLp2W6XdIMgwoOQwzT60+yJGaZh69fa4m9JGQmBk3zM2lqbWdFVvKCAQcl8/QrIrSNzVIDyO1Ta384p1CLp+RxaIpGX6HI8NIlBm3XTCRc/NSeG3bYd7ccUTjMEmflByGkf98dy9VDa1866qz/A5FhqHoqOBd1PMmpvHWznIeemGLurlKr1StNEwUltfy2Hv7uHFuLnPyNMCenJ4oM26al0tKYgzPri1i/9F6fvml+aSP0nhMcjIbTpeWCxYscAUFBX6HMWQ6Gpudc/z6/f0cqWniG1fOIFlzNsgAGBUfzbeWbSY1MZZHbj6XT5891u+QZJCY2Trn3IJT2UfVSsPA+kPHOXCsnsWzspUYZMAsOT+XF//qYtKS4vizJ9by98s2UV6jGeUkSMkhzB1vaGHFljImpicxPz/N73BkhJk1PoXlX7+E//WpqbywvoTL//1tfvjqTiUJUZtDOGsPOJ5dW0TAOW6Zn0eUuq7KIIiPiebBa87mjoUT+PEbu/nVe3v59fv7uGJGFkvm5nLZtEzS1CYRcZQcwtjKbYc5VNnA7RdMIDM53u8CBiPOAAAO7klEQVRwZISblDGK/7h9Lv/7czNYWlDE8+uKWbWzHDOYk5vCuXkpzBqfwuzxKczITiY+JtrvkGUQKTmEqZc2lvBB4VEWTdb4STK0JmeO4v8sPpu/u3IGm4qreW93Bav3HeOlDaX87uNgJ4mYKGP6uNHMHj+GprYAuSkJZKckEhfTc021xnEaftTmMAh+/MbuXp+77Ver+93/je1HeGDpJiZnjqKsupE3dxzpsdw/vLgFgO/8cctJ2zvKdzzfebnj8ZHXdpy0/u0uzz/23t4T6133PZ3H3p7riP1MjjEQcQzV63c97/2dj471f3p564n1rr/vjt9dR5mO1+go1/HY8TvteIT/+Xvs+JvteLztV6uJiY7i317byTeunMFFUzLY9L2rOC8vhZ/fOY/7L59CQ3Mbq3aW8/KmUv7zvX3848vb+Nnbe/inl7eyt6KOtvaT76G45Ier6Krr63aNK9z19Vkf7pQcBsFPVu3p9bk1+yv73PeDPUf566fXM3v8GO65cBKHKht5a2d5n/u0d+mN3F95gOrGkyeB6dqh+cCxhn6PMRC6xj7SdT3vXXU9Hx3rzW3uxHrXMh2rHWU6XqOjXMdjx++04/HpNYdYs7+Sp9cc4ier9pz02LG98/PPri1iU3E11Y2t5KUlcbCygb+7cgYAX1o0CYC46Cia2xyPf7Cff/7Tdp786ABr9h+jsr6FkuPdG7k7PitdPzP9fU7CRV+f9eFO1Uph5Ok1h/juS1uZmpXME3+2kFe3HvY7JJE+dYzvNXP8GADuv3wq//DiFu6+cBJ7ymvZfaSOXRtreXlTKQAPPLeRWeNTSIz7n/YKDR4ZnpQcwkBjSzv/smIHT318kE/NyOLRO+aSkhjrd1gip+2cnDGckzMG5xxl1U1sKanm3d0VvLChhJc2ljJjXDLnTVBbWjhTcvDZqh1H+N7ybRRXNfIXl03mwWvOITpKXVZlZDAzxqcmMj41kXd3V/BXV0xlc3E1m4qPs+NwLQDL1hUDwa7b+tsPH0oOPggEHKt2lvPr9/fxyf5Kpo1N5tn7L+RCjbQqI1xeWhJ5aUksnp3Nvop6fvPhfraVVgNw8Q9Xcf1541lyfq7PUQooOQyarvWo7QFHcVWwIXDe99/geEMruamJfPe6mXzpwkm9dgEUGYmizJg2NhmAf7j2HL63fBtzclN54qMD/Pr9/QD8dNUerpqVzYxxyZq7xAchJQczWwz8BIgG/ss598Muz8cDvwXmA8eA25xzB7znHgLuA9qBv3HOrQzlmMNdTWMrRVUNHKpsoKiygZLjjbR63UbGjU7g+zfMZvGsbGKilRQkssV6n4HPnD2WCyens6W0mpc2lvL/3tjN/3tjN+mj4jgnezTnjB/Dg4vP1mdmiPSbHMwsGvg5cCVQDKw1s+XOue2dit0HVDnnppnZ7cAjwG1mNhO4HZgFjAfeNLMZ3j79HTPsNbe1U1LVyMHKBvZV1FNYXsueI3UA/PC1nQBEmzE+NYGF+elMzBjFM58c4t6L87nu3PF+hi4SlpLiY1g0OYOXNpby4OKz2XG4hh1lNXy8v5IP9x7jmTWHmDsxjfmT0liQn8bciWkajHKQhHJWFwKFzrl9AGb2LLAE6PxFvgT4R295GfAzC14HLgGedc41A/vNrNA7HiEcc8gFAo6G1nbqm9uoa26jobmdmqZWKmqbqaht5mhd8LG0upFDxxooq2mi84jnaUmxTB83GoDrzs0hLzWRnNTEE/8ZATwz1G9KZJgakxjLoskZLJqcQXNrO7vL64iJMgoOVvHTt/YQcBBlkJuWyKT0UUxIT2JSRhIT05NITYwlOSGG5PgYRifEkhQXTZQZHbVTZuAcNLcGaGpr7/bY1NrO69uO0NoeoLXd0doeoK09QEu7oz0QwAEdFV0/en0XUVFGbHQU8TFRxMVEERsdRVx0cDkupvtyrLd+UvkTz1lYVKOFkhxygaJO68XAot7KOOfazKwayPC2f9xl347Wpv6OOWDufnwN+yrqaQsEaA9AeyBAe8ARcNAWCBAIQLtztAf6viMrLiaKrOR4slMSuHBKBhPSg3+IEzOSmJI5igxv/KP8B//ExVM1ibvIQImPjWZObsqJYThqm1rZWHScdQer2FdRz8HKBlZuO0xlfcugx9L1a/vRtwoH/DW6JZMY441vfIqE2KEbz6rfyX7M7FbgaufcV7z1u4GFzrmvdyqzzStT7K3vJXiF8DCw2jn3O2/748AKgndm93nMTse+H7jfWz0L2HX6b/e0ZQJHfXjd/iiuUxOOcYVjTKC4TkU4xgQnxzXJOZd1KjuHcuVQDEzotJ4HlPZSptjMYoAUoLKfffs7JgDOuceAx0KIc9CYWcGpzqI0FBTXqQnHuMIxJlBcpyIcY4IzjyuUZv+1wHQzm2xmcQQbmJd3KbMcuNdbvgV4ywUvSZYDt5tZvJlNBqYDn4R4TBER8Um/Vw5eG8LXgJUEu53+xjm3zcweBgqcc8uBx4GnvAbnSoJf9njllhJsaG4D/to51w7Q0zEH/u2JiMjpCKkPmHNuBcG2gs7bvttpuQm4tZd9fwD8IJRjhjFfq7X6oLhOTTjGFY4xgeI6FeEYE5xhXP02SIuISOTRrYYiItKNkkM/zGyxme0ys0Ize9DHOCaY2dtmtsPMtpnZ//a2p5vZG2a2x3tM8yG2aDPbYGaveOuTzWyNF9NzXqeDoY4p1cyWmdlO75xdFCbn6hve72+rmT1jZgl+nC8z+42ZlZvZ1k7bejw/FvSo9xnYbGbzhjCmf/d+h5vN7EUzS+303ENeTLvM7OrBiKm3uDo9900zc2aW6a0PybnqKy4z+7p3TraZ2b912n5q58s5p59efgg2lu8FpgBxwCZgpk+x5ADzvOXRwG5gJvBvwIPe9geBR3yI7QHgaeAVb30pcLu3/J/AV32I6UngK95yHJDq97kieAPofiCx03n6sh/nC7gcmAds7bStx/MDXAu8SvD+rwuBNUMY01VAjLf8SKeYZnqfx3hgsvc5jR6quLztEwh2qjkIZA7luerjfH0aeBOI99bHnu75GrIPxnD8AS4CVnZafwh4yO+4vFheIjg21S4gx9uWA+wa4jjygFXAZ4BXvA/F0U4f6JPO4RDFNMb7ErYu2/0+Vx0jCaQT7AzyCnC1X+cLyO/yxdLj+QF+BdzRU7nBjqnLczcCv/eWT/osel/SFw3VufK2LQPOAw50Sg5Ddq56+R0uBT7XQ7lTPl+qVupbT0OH+D7YvJnlA3OBNcA451wZgPc4dojD+Q/g74GO2eQzgOPOuY7Jkv04Z1OACuC/vequ/zKzUfh8rpxzJcD/BQ4BZUA1sA7/z1eH3s5PuHwO/pzgf+Xgc0xmdj1Q4pzb1OUpv8/VDOAyr5ryXTO74HTjUnLoW0+jX/navcvMkoHngb91ztX4HMt1QLlzbl3nzT0UHepzFkPwcvuXzrm5QD3BahJfeXX4Swhe1o8HRgHX9FA03LoQ+v47NbNvE7xX6vcdm3ooNiQxmVkS8G3guz093cO2oTxXMUAawSqtbwFLzcxOJy4lh76FMnTIkDGzWIKJ4ffOuRe8zUfMLMd7PgcoH8KQLgGuN7MDwLMEq5b+A0i14DAq4M85KwaKnXNrvPVlBJOFn+cK4HPAfudchXOuFXgBuBj/z1eH3s6Pr58DM7sXuA64y3l1Ij7HNJVggt/k/e3nAevNLNvnuPBe/wUX9AnBK/rM04lLyaFvYTPMh5f9Hwd2OOd+1OmpzkOX3EuwLWJIOOcecs7lOefyCZ6bt5xzdwFvExxGZchj8uI6DBSZ2Vneps8SvEvft3PlOQRcaGZJ3u+zIy5fz1cnvZ2f5cA9Xk+cC4HqjuqnwWbBScH+D3C9c66hS6w9Dc0z6JxzW5xzY51z+d7ffjHBziKH8fFcef5I8J80LDh3ThzBNq1TP1+D1VAyUn4I9j7YTbB1/9s+xnEpwcvAzcBG7+dagnX8q4A93mO6T/Fdwf/0Vpri/eEVAn/A6zkxxPGcDxR45+uPBC+1fT9XwD8BO4GtwFMEe48M+fkiOLVIGdBK8Mvtvt7OD8EqiZ97n4EtwIIhjKmQYF15x9/8f3Yq/20vpl3ANUN5rro8f4D/aZAeknPVx/mKA37n/X2tBz5zuudLd0iLiEg3qlYSEZFulBxERKQbJQcREelGyUFERLpRchARkW6UHEREpBslBxmxvGG7/6qfMvlmdmcIx8rvachmkZFKyUFGslSgz+RAcFTLfpPDUOs0nIaIL5QcZCT7ITDVzDZ6k8b8uzfJzhYzu61Tmcu8Mt/wrhDeN7P13s/FobyQmc0ys0+842w2s+ne9nu89U1m9pS3bZKZrfK2rzKzid72J8zsR2b2NvCImY3yJnRZ640uu2TgT5FIz3SHtIxY3tDmrzjnZpvZzcD/AhYTHIhsLbAIOAv4pnPuOm+fJCDgnGvyvuCfcc4t6HysXl7rp8DHzrnfe+NwRRMcFuMF4BLn3FEzS3fOVZrZy8Ay59yTZvbnBMcNusHMnvBiW+KcazezfwG2O+d+Z8EZ0D4B5jrn6gf+bImcTJeuEikuJfhF305w9NF3gQuArsOexwI/M7PzgXaC4+OHYjXwbTPLIzgq5h4z+wzBJHAUwDlX6ZW9CLjJW36K4AxsHf7gxQjBWdCuN7NveusJwERgR4gxiZw2JQeJFD2NZ9+TbwBHCM7wFQU0hbKTc+5pM1sDfB5YaWZf8V4zlEvzzmU6XxUYcLNzblcoMYgMJLU5yEhWS3C+bYD3gNvMLNrMsgjOv/tJlzIAKUCZcy4A3E2weqhfZjYF2Oece5Tg8MjnEhzZ9ItmluGVSfeKf0RwiHOAu4APejnsSuDr3vDemNncUGIRGQi6cpARyzl3zMw+9Lqgvkpw+O5NBP9T/3vn3GEzOwa0mdkm4AngF8DzZnYrwXkWQq3fvw34kpm1AoeBh732hR8A75pZO7AB+DLwN8BvzOxbBKcz/bNejvnPBCdP2uwliAMEJ70RGXRqkBYRkW5UrSQiIt2oWknkFJjZ1cAjXTbvd87d6Ec8IoNF1UoiItKNqpVERKQbJQcREelGyUFERLpRchARkW6UHEREpJv/D0V2/QXIPSJSAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Use seaborn distplot to plot the distribution of total_score\n",
    "# Turn on rug to show each game's total score\n",
    "_ = sns.distplot(df_cfb.total_score, rug=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Part 2: World Bank Data\n",
    "\n",
    "This data is provided by World Bank Open Data https://data.worldbank.org/.\n",
    "\n",
    "It includes many country data indicators sampled over time.\n",
    "\n",
    "There are two files we are interested in:\n",
    "1. WDICountry.csv includes country and region information, one country or region per row.\n",
    "2. WDIData.csv includes indicator data, one row per country and indicator.\n",
    "\n",
    "We would like to be able to analyze a few indicators for countries grouped by region.  \n",
    "To do that we will need to clean and join the two sets of records."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Part 2a: Munge WDICountry"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "df_country has 263 rows\n"
     ]
    }
   ],
   "source": [
    "# Read Country information from '../data/WDICountry.csv' into df_country\n",
    "# print the number of rows in df_country\n",
    "df_country = pd.read_csv('../data/WDICountry.csv')\n",
    "print('df_country has {} rows'.format(len(df_country)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Country Code</th>\n",
       "      <th>Short Name</th>\n",
       "      <th>Table Name</th>\n",
       "      <th>Long Name</th>\n",
       "      <th>2-alpha code</th>\n",
       "      <th>Currency Unit</th>\n",
       "      <th>Special Notes</th>\n",
       "      <th>Region</th>\n",
       "      <th>Income Group</th>\n",
       "      <th>WB-2 code</th>\n",
       "      <th>National accounts base year</th>\n",
       "      <th>National accounts reference year</th>\n",
       "      <th>SNA price valuation</th>\n",
       "      <th>Lending category</th>\n",
       "      <th>Other groups</th>\n",
       "      <th>System of National Accounts</th>\n",
       "      <th>Alternative conversion factor</th>\n",
       "      <th>PPP survey year</th>\n",
       "      <th>Balance of Payments Manual in use</th>\n",
       "      <th>External debt Reporting status</th>\n",
       "      <th>System of trade</th>\n",
       "      <th>Government Accounting concept</th>\n",
       "      <th>IMF data dissemination standard</th>\n",
       "      <th>Latest population census</th>\n",
       "      <th>Latest household survey</th>\n",
       "      <th>Source of most recent Income and expenditure data</th>\n",
       "      <th>Vital registration complete</th>\n",
       "      <th>Latest agricultural census</th>\n",
       "      <th>Latest industrial data</th>\n",
       "      <th>Latest trade data</th>\n",
       "      <th>Latest water withdrawal data</th>\n",
       "      <th>Unnamed: 31</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>ABW</td>\n",
       "      <td>Aruba</td>\n",
       "      <td>Aruba</td>\n",
       "      <td>Aruba</td>\n",
       "      <td>AW</td>\n",
       "      <td>Aruban florin</td>\n",
       "      <td>SNA data for 2000-2011 are updated from official government statistics; 1994-1999 from UN databa...</td>\n",
       "      <td>Latin America &amp; Caribbean</td>\n",
       "      <td>High income</td>\n",
       "      <td>AW</td>\n",
       "      <td>2000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Value added at basic prices (VAB)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Country uses the 1993 System of National Accounts methodology.</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2011</td>\n",
       "      <td>IMF Balance of Payments Manual, 6th edition.</td>\n",
       "      <td>NaN</td>\n",
       "      <td>General trade system</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2010</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Yes</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2015.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>AFG</td>\n",
       "      <td>Afghanistan</td>\n",
       "      <td>Afghanistan</td>\n",
       "      <td>Islamic State of Afghanistan</td>\n",
       "      <td>AF</td>\n",
       "      <td>Afghan afghani</td>\n",
       "      <td>Fiscal year end: March 20; reporting period for national accounts data is calendar year, estimat...</td>\n",
       "      <td>South Asia</td>\n",
       "      <td>Low income</td>\n",
       "      <td>AF</td>\n",
       "      <td>2002/03</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Value added at basic prices (VAB)</td>\n",
       "      <td>IDA</td>\n",
       "      <td>HIPC</td>\n",
       "      <td>Country uses the 1993 System of National Accounts methodology.</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>IMF Balance of Payments Manual, 6th edition.</td>\n",
       "      <td>Actual</td>\n",
       "      <td>General trade system</td>\n",
       "      <td>Consolidated central government</td>\n",
       "      <td>Enhanced General Data Dissemination System (e-GDDS)</td>\n",
       "      <td>1979</td>\n",
       "      <td>Demographic and Health Survey, 2015</td>\n",
       "      <td>Integrated household survey (IHS), 2011</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2015.0</td>\n",
       "      <td>2000.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>AGO</td>\n",
       "      <td>Angola</td>\n",
       "      <td>Angola</td>\n",
       "      <td>People's Republic of Angola</td>\n",
       "      <td>AO</td>\n",
       "      <td>Angolan kwanza</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Sub-Saharan Africa</td>\n",
       "      <td>Lower middle income</td>\n",
       "      <td>AO</td>\n",
       "      <td>2002</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Value added at producer prices (VAP)</td>\n",
       "      <td>IBRD</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Country uses the 1993 System of National Accounts methodology.</td>\n",
       "      <td>1991–96</td>\n",
       "      <td>2011</td>\n",
       "      <td>IMF Balance of Payments Manual, 6th edition.</td>\n",
       "      <td>Actual</td>\n",
       "      <td>Special trade system</td>\n",
       "      <td>Budgetary central government</td>\n",
       "      <td>Enhanced General Data Dissemination System (e-GDDS)</td>\n",
       "      <td>2014</td>\n",
       "      <td>Demographic and Health Survey, 2015/16</td>\n",
       "      <td>Integrated household survey (IHS), 2008/09</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2015.0</td>\n",
       "      <td>2005.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Country Code   Short Name   Table Name                     Long Name  \\\n",
       "0          ABW        Aruba        Aruba                         Aruba   \n",
       "1          AFG  Afghanistan  Afghanistan  Islamic State of Afghanistan   \n",
       "2          AGO       Angola       Angola   People's Republic of Angola   \n",
       "\n",
       "  2-alpha code   Currency Unit  \\\n",
       "0           AW   Aruban florin   \n",
       "1           AF  Afghan afghani   \n",
       "2           AO  Angolan kwanza   \n",
       "\n",
       "                                                                                         Special Notes  \\\n",
       "0  SNA data for 2000-2011 are updated from official government statistics; 1994-1999 from UN databa...   \n",
       "1  Fiscal year end: March 20; reporting period for national accounts data is calendar year, estimat...   \n",
       "2                                                                                                  NaN   \n",
       "\n",
       "                      Region         Income Group WB-2 code  \\\n",
       "0  Latin America & Caribbean          High income        AW   \n",
       "1                 South Asia           Low income        AF   \n",
       "2         Sub-Saharan Africa  Lower middle income        AO   \n",
       "\n",
       "  National accounts base year  National accounts reference year  \\\n",
       "0                        2000                               NaN   \n",
       "1                     2002/03                               NaN   \n",
       "2                        2002                               NaN   \n",
       "\n",
       "                    SNA price valuation Lending category Other groups  \\\n",
       "0     Value added at basic prices (VAB)              NaN          NaN   \n",
       "1     Value added at basic prices (VAB)              IDA         HIPC   \n",
       "2  Value added at producer prices (VAP)             IBRD          NaN   \n",
       "\n",
       "                                      System of National Accounts  \\\n",
       "0  Country uses the 1993 System of National Accounts methodology.   \n",
       "1  Country uses the 1993 System of National Accounts methodology.   \n",
       "2  Country uses the 1993 System of National Accounts methodology.   \n",
       "\n",
       "  Alternative conversion factor PPP survey year  \\\n",
       "0                           NaN            2011   \n",
       "1                           NaN             NaN   \n",
       "2                       1991–96            2011   \n",
       "\n",
       "              Balance of Payments Manual in use  \\\n",
       "0  IMF Balance of Payments Manual, 6th edition.   \n",
       "1  IMF Balance of Payments Manual, 6th edition.   \n",
       "2  IMF Balance of Payments Manual, 6th edition.   \n",
       "\n",
       "  External debt Reporting status       System of trade  \\\n",
       "0                            NaN  General trade system   \n",
       "1                         Actual  General trade system   \n",
       "2                         Actual  Special trade system   \n",
       "\n",
       "     Government Accounting concept  \\\n",
       "0                              NaN   \n",
       "1  Consolidated central government   \n",
       "2     Budgetary central government   \n",
       "\n",
       "                       IMF data dissemination standard  \\\n",
       "0                                                  NaN   \n",
       "1  Enhanced General Data Dissemination System (e-GDDS)   \n",
       "2  Enhanced General Data Dissemination System (e-GDDS)   \n",
       "\n",
       "  Latest population census                 Latest household survey  \\\n",
       "0                     2010                                     NaN   \n",
       "1                     1979     Demographic and Health Survey, 2015   \n",
       "2                     2014  Demographic and Health Survey, 2015/16   \n",
       "\n",
       "  Source of most recent Income and expenditure data  \\\n",
       "0                                               NaN   \n",
       "1           Integrated household survey (IHS), 2011   \n",
       "2        Integrated household survey (IHS), 2008/09   \n",
       "\n",
       "  Vital registration complete Latest agricultural census  \\\n",
       "0                         Yes                        NaN   \n",
       "1                         NaN                        NaN   \n",
       "2                         NaN                        NaN   \n",
       "\n",
       "   Latest industrial data  Latest trade data  Latest water withdrawal data  \\\n",
       "0                     NaN             2015.0                           NaN   \n",
       "1                     NaN             2015.0                        2000.0   \n",
       "2                     NaN             2015.0                        2005.0   \n",
       "\n",
       "   Unnamed: 31  \n",
       "0          NaN  \n",
       "1          NaN  \n",
       "2          NaN  "
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Print the first 3 rows of WDICountry\n",
    "df_country.head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "df_country has 32 columns\n"
     ]
    }
   ],
   "source": [
    "# Using .columns, how many columns does WDICountry have?\n",
    "print('df_country has {} columns'.format(len(df_country.columns)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "df_country has 4 columns\n"
     ]
    }
   ],
   "source": [
    "# We'll only keep a few columns: ['Country Code','Short Name','Region','Income Group']\n",
    "# Overwrite df_country with a new dataframe containing only these columns\n",
    "# Print out the statement 'df_country has {} columns' using .format to confirm that there are only 4 columns\n",
    "df_country = df_country.loc[:,['Country Code','Short Name','Region','Income Group']]\n",
    "print('df_country has {} columns'.format(df_country.shape[1]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 263 entries, 0 to 262\n",
      "Data columns (total 4 columns):\n",
      "Country Code    263 non-null object\n",
      "Short Name      263 non-null object\n",
      "Region          217 non-null object\n",
      "Income Group    217 non-null object\n",
      "dtypes: object(4)\n",
      "memory usage: 8.3+ KB\n"
     ]
    }
   ],
   "source": [
    "# Examine df_country using .info\n",
    "df_country.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "there are 46 rows with missing data, 0.17 of the dataset\n"
     ]
    }
   ],
   "source": [
    "# There are some rows with missing Region and Income Group.\n",
    "# Print out both the number and proportion of rows with missing Region information\n",
    "n_missing = sum(df_country.Region.isnull())\n",
    "prop_missing = n_missing / len(df_country)\n",
    "print('there are {:} rows with missing data, {:0.2} of the dataset'.format(n_missing,prop_missing))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 217 entries, 0 to 262\n",
      "Data columns (total 4 columns):\n",
      "Country Code    217 non-null object\n",
      "Short Name      217 non-null object\n",
      "Region          217 non-null object\n",
      "Income Group    217 non-null object\n",
      "dtypes: object(4)\n",
      "memory usage: 8.5+ KB\n"
     ]
    }
   ],
   "source": [
    "# Drop the rows of df_country with any null values (using inplace=True)\n",
    "# Use .info to make sure there a no longer null values\n",
    "df_country.dropna(inplace=True)\n",
    "df_country.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Each row of df_country should be a separate country\n",
    "# Assert that there are no duplicates (use len and drop_duplicates)\n",
    "# hint: df_country should be the same length before and after dropping duplicate rows\n",
    "assert len(df_country.drop_duplicates()) == len(df_country)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Assert that 'Country Code' is unique (use len and unique)\n",
    "# hist: the number of unique country codes should be the same length as the dataframe\n",
    "assert len(df_country.loc[:,'Country Code'].unique()) == len(df_country)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Short Name</th>\n",
       "      <th>Region</th>\n",
       "      <th>Income Group</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Country Code</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>ABW</th>\n",
       "      <td>Aruba</td>\n",
       "      <td>Latin America &amp; Caribbean</td>\n",
       "      <td>High income</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AFG</th>\n",
       "      <td>Afghanistan</td>\n",
       "      <td>South Asia</td>\n",
       "      <td>Low income</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AGO</th>\n",
       "      <td>Angola</td>\n",
       "      <td>Sub-Saharan Africa</td>\n",
       "      <td>Lower middle income</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ALB</th>\n",
       "      <td>Albania</td>\n",
       "      <td>Europe &amp; Central Asia</td>\n",
       "      <td>Upper middle income</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AND</th>\n",
       "      <td>Andorra</td>\n",
       "      <td>Europe &amp; Central Asia</td>\n",
       "      <td>High income</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               Short Name                     Region         Income Group\n",
       "Country Code                                                             \n",
       "ABW                 Aruba  Latin America & Caribbean          High income\n",
       "AFG           Afghanistan                 South Asia           Low income\n",
       "AGO                Angola         Sub-Saharan Africa  Lower middle income\n",
       "ALB               Albania      Europe & Central Asia  Upper middle income\n",
       "AND               Andorra      Europe & Central Asia          High income"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Set the index of df_country to be 'Country Code' (inplace) and display the first 5 rows\n",
    "df_country.set_index('Country Code', inplace=True)\n",
    "df_country.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Part 2b: Munge WDIData"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Now we'll load the other country data we're interested in.\n",
    "# Read csv '../data/WDIData.csv.zip' into df_data\n",
    "# Note: \n",
    "#  Since this file is large it is stored as a zip.\n",
    "#  You don't need to decompress the zip file first, pandas will handle this for you.\n",
    "df_data = pd.read_csv('../data/WDIData.csv.zip')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# Display .info for df_data\n",
    "# Note that the data is in long format instead of wide format\n",
    "df_data.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Use pprint.pprint to print a list of the unique values in Indicator Name\n",
    "# These are all of the available data points in this file, which is why it's so large.\n",
    "# (to see the difference, try using the standard 'print' first)\n",
    "pprint.pprint(list(df_data.loc[:,'Indicator Name'].unique()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# We'll only keep a few of these indicators.\n",
    "# Create a list of indicators to keep that includes these indicators:\n",
    "# 'Employment to population ratio, 15+, female (%) (modeled ILO estimate)','GDP (constant 2010 US$)','Population, total','Population density (people per sq. km of land area)','Unemployment, total (% of total labor force) (national estimate)'\n",
    "data_indicators_to_keep = ['Employment to population ratio, 15+, female (%) (modeled ILO estimate)','GDP (constant 2010 US$)','Population, total','Population density (people per sq. km of land area)','Unemployment, total (% of total labor force) (national estimate)']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# The columns of WIData contain information for each year.\n",
    "# We'll look at only year 2016\n",
    "# Create a list of columns to keep that includes these columns:\n",
    "# 'Country Code','Indicator Name','2016'\n",
    "data_columns_to_keep = ['Country Code','Indicator Name','2016']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Overwrite df_data witha dataframe containing only: \n",
    "#  the rows whose 'Indicator Name' is in data_indicators_to_keep (use .isin) and \n",
    "#  the columns in data_columns_to_keep\n",
    "df_data = df_data.loc[df_data['Indicator Name'].isin(data_indicators_to_keep),data_columns_to_keep]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Display the first 5 rows of df_data\n",
    "df_data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Into df_data_pivot, pivot df_data with index 'Country Code', columns 'Indicator Name' and values '2016'\n",
    "# Display the first 5 rows of df_data_pivot\n",
    "df_data_pivot = df_data.pivot(index='Country Code',columns='Indicator Name',values='2016')\n",
    "df_data_pivot.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Part 2c: Join the two datasets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Importantly, df_country and df_data_pivot now have the the same index values.\n",
    "# To see this, use .index to print out the first 5 index values in df_country and df_data\n",
    "print(df_country.index[:5])\n",
    "print(df_data_pivot.index[:5])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Into df_wdi put the inner join of df_country with df_data_pivot using .join\n",
    "# Display the first 5 rows\n",
    "df_wdi = df_country.join(df_data_pivot, how='inner')\n",
    "df_wdi.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Assert that the number of rows matches the number of unique 'Short Name'\n",
    "assert len(df_wdi) == len(df_wdi.loc[:,'Short Name'].unique())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Part 2d: Analysis and Visualization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Display the number of countries per region seen in df_wdi (use .value_counts)\n",
    "df_wdi['Region'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# What proportion of our dataset is made up by each region?\n",
    "# Hint: Divide the previous counts by the number of rows.\n",
    "df_wdi['Region'].value_counts() / len(df_wdi)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Display the summary stats (means and quartiles) for 'Employment to population ratio, 15+, female (%) (modeled ILO estimate)'\n",
    "df_wdi['Employment to population ratio, 15+, female (%) (modeled ILO estimate)'].describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Let's rename that column to something shorter.\n",
    "# Use .rename to rename \n",
    "#  'Employment to population ratio, 15+, female (%) (modeled ILO estimate)' \n",
    "#  to \n",
    "#  'Female Employment Ratio'\n",
    "# Display the columns to confirm.\n",
    "df_wdi.rename({'Employment to population ratio, 15+, female (%) (modeled ILO estimate)':'Female Employment To Population Ratio'},\n",
    "              axis='columns',\n",
    "              inplace=True\n",
    "             )\n",
    "df_wdi.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Use seaborn .catplot to display box plots of 'Female Employment to Population Ratio' for each Region\n",
    "# Since the region names are long, we'll use horizontal box plots.\n",
    "# Put 'Female Employment To Population Ratio' on the x-axis and 'Region' on the y-axis.\n",
    "# Set 'aspect' to 2 to widen the plot\n",
    "_ = sns.catplot(x='Female Employment To Population Ratio',\n",
    "                y='Region',\n",
    "                data=df_wdi,\n",
    "                kind='box',\n",
    "                aspect=2\n",
    "               )"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "py36",
   "language": "python",
   "name": "py36"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
